Tutorial from RASA NLU.
MITTE: For corpus
pip install git+https://github.com/mit-nlp/MITIE.git
Spacy: For corpus
pip install -U spacy
Rasa NLU: For NLP programming
pip install rasa_nlu
Flask: For API/Web Service support
pip install flask
Gevent: For coroutines with the API
pip install gevent
In [1]:
# INFO:root:Configuration: {
# "num_threads": 1,
# "log_level": 20,
# "language": "en",
# "mitie_file": "data/total_word_feature_extractor.dat",
# "emulate": null,
# "config": "config.json",
# "response_log": "/Users/flavio.clesio/logs",
# "server_model_dirs": null,
# "fine_tune_spacy_ner": false,
# "token": null,
# "path": "/Users/flavio.clesio/models",
# "log_file": null,
# "data": null,
# "port": 5000,
# "backend": "mitie"
# }
# INFO:root:Logging requests to '/Users/flavio.clesio/logs/rasa_nlu_log-20170423-110825-12766.log'.
# INFO:root:Started http server on port 5000
# 127.0.0.1 - - [2017-04-23 11:08:39] "GET /parse?q=hello HTTP/1.1" 200 193 0.002418
# 127.0.0.1 - - [2017-04-23 11:08:40] "GET /favicon.ico HTTP/1.1" 404 342 0.020442
In [2]:
# Training to recognize 4 categories: 'greet', 'restaurant_search', 'affirm', 'goodbye'
# Train classifier
# extracting text features
# now do training
# num training samples: 51
# C: 200 f-score: 0.84
# C: 400 f-score: 0.84
# C: 300 f-score: 0.84
# C: 100 f-score: 0.84
# C: 0.01 f-score: 0.74
# C: 600 f-score: 0.84
# C: 1400 f-score: 0.84
# C: 3000 f-score: 0.84
# C: 5000 f-score: 0.84
# C: 2550 f-score: 0.84
# C: 1325 f-score: 0.84
# C: 712.5 f-score: 0.84
# C: 406.25 f-score: 0.84
# C: 253.125 f-score: 0.84
# C: 176.562 f-score: 0.84
# C: 138.281 f-score: 0.84
# C: 119.141 f-score: 0.84
# C: 109.57 f-score: 0.84
# C: 104.785 f-score: 0.84
# C: 102.393 f-score: 0.84
# C: 101.196 f-score: 0.84
# C: 100.598 f-score: 0.84
# C: 100.299 f-score: 0.84
# best C: 100.598
# test on train:
# 10 0 0 0
# 0 17 0 0
# 0 0 14 0
# 0 0 0 10
# overall accuracy: 1
# Training time: 535 seconds.
# df.number_of_classes(): 4
# Training to recognize 2 labels: 'location', 'cuisine'
# Part I: train segmenter
# words in dictionary: 200000
# num features: 271
# now do training
# C: 20
# epsilon: 0.01
# num threads: 1
# cache size: 5
# max iterations: 2000
# loss per missed segment: 3
# C: 20 loss: 3 1
# C: 35 loss: 3 1
# C: 20 loss: 4.5 1
# C: 5 loss: 3 1
# C: 20 loss: 1.5 0.888889
# C: 20 loss: 3.75 1
# C: 21.5 loss: 3 1
# C: 20 loss: 3.375 1
# C: 18.5 loss: 3 1
# C: 20 loss: 3.1875 1
# C: 20 loss: 2.9 1
# C: 20 loss: 2.9 1
# C: 20 loss: 2.97801 1
# best C: 20
# best loss: 3
# num feats in chunker model: 4095
# train: precision, recall, f1-score: 1 1 1
# Part I: elapsed time: 1 seconds.
# Part II: train segment classifier
# now do training
# num training samples: 9
# C: 200 f-score: 1
# C: 400 f-score: 1
# C: 300 f-score: 1
# C: 100 f-score: 1
# C: 0.01 f-score: 0.875
# C: 600 f-score: 1
# C: 1400 f-score: 1
# C: 3000 f-score: 1
# C: 5000 f-score: 1
# C: 2550 f-score: 1
# C: 1325 f-score: 1
# C: 712.5 f-score: 1
# C: 406.25 f-score: 1
# C: 253.125 f-score: 1
# C: 176.562 f-score: 1
# C: 138.281 f-score: 1
# C: 119.141 f-score: 1
# C: 109.57 f-score: 1
# C: 104.785 f-score: 1
# C: 102.393 f-score: 1
# C: 101.196 f-score: 1
# C: 100.598 f-score: 1
# C: 100.299 f-score: 1
# best C: 100.598
# test on train:
# 4 0
# 0 5
# overall accuracy: 1
# Part II: elapsed time: 9 seconds.
# df.number_of_classes(): 2
In [3]:
# % Total % Received % Xferd Average Speed Time Time Time Current
# Dload Upload Total Spent Left Speed
# 100 279 100 242 100 37 10041 1535 --:--:-- --:--:-- --:--:-- 10521
# {
# "confidence": 0.6487632074644808,
# "entities": [
# {
# "end": 24,
# "entity": "cuisine",
# "start": 17,
# "value": "Chinese"
# }
# ],
# "intent": "restaurant_search",
# "text": "I am looking for Chinese food"
# }